3.7 合并数据集：Concat Append

本文源码请见我的GitHub

1 2	import numpy as np import pandas as pd

def make_df(cols, ind):
    '''一个简单的DataFrame创建函数'''
    data = {c: [str(c) + str(i) for i in ind] for c in cols}
    return pd.DataFrame(data,ind)
make_df('ABC', range(4))

	A	B	C
0	A0	B0	C0
1	A1	B1	C1
2	A2	B2	C2
3	A3	B3	C3

pd.concat()可以简单的合并一维的Series或dataFrame吗与 np.concat()合并数组一样

1 2	ser1 = pd.Series(['A', 'B', 'C'], index = [1,2 ,3]) ser2 = pd.Series(['D', 'E', 'F'], index = [4,5,6])

df1 = make_df('AB',[1,2])
df2 = make_df('AB',[3,4])
print(df1);print('\n');
print(df2);print('\n')
print(pd.concat([df1,df2]))

1 2	df3 = make_df('AB', [0,1]) df4 = make_df('CD', [0,1])

df3

	A	B
0	A0	B0
1	A1	B1

df4

	C	D
0	C0	D0
1	C1	D1

1	pd.concat([df3,df4] ,axis= 1)

	A	B	C	D
0	A0	B0	C0	D0
1	A1	B1	C1	D1

1.索引重复

1	#ignore_index用来忽略重复的索引

1
2
3

x  = make_df('AB', [0,1])
y  = make_df('AB', [2,3])
y.index = x.index#复制索引

	A	B
0	A0	B0
1	A1	B1

	A	B
0	A2	B2
1	A3	B3

1	pd.concat([x,y])

	A	B
0	A0	B0
1	A1	B1
0	A2	B2
1	A3	B3

1	#这里就有重复的索引

1 2	#忽略索引 pd.concat([x,y], keys=['x','y'],ignore_index=True)

	A	B
0	A0	B0
1	A1	B1
2	A2	B2
3	A3	B3

2.类似join的合并

1
2
3

df5 = make_df('ABC',[1.2])
df6 = make_df('BCD',[3,4])
print(df5);print(df6);print(pd.concat([df5,df6]));

        A     B     C
1.2  A1.2  B1.2  C1.2
    B   C   D
3  B3  C3  D3
4  B4  C4  D4
        A     B     C    D
1.2  A1.2  B1.2  C1.2  NaN
3.0   NaN    B3    C3   D3
4.0   NaN    B4    C4   D4


D:\Software\Anaconda3\lib\site-packages\ipykernel_launcher.py:3: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  This is separate from the ipykernel package so we can avoid doing imports until

df5 = make_df('ABC',[1.2])
df6 = make_df('BCD',[3,4])
print(df5);print(df6);print(pd.concat([df5,df6],join='outer'));
#并集合并

        A     B     C
1.2  A1.2  B1.2  C1.2
    B   C   D
3  B3  C3  D3
4  B4  C4  D4
        A     B     C    D
1.2  A1.2  B1.2  C1.2  NaN
3.0   NaN    B3    C3   D3
4.0   NaN    B4    C4   D4


D:\Software\Anaconda3\lib\site-packages\ipykernel_launcher.py:3: FutureWarning: Sorting because non-concatenation axis is not aligned. A future version
of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.

To retain the current behavior and silence the warning, pass 'sort=True'.

  This is separate from the ipykernel package so we can avoid doing imports until

df5 = make_df('ABC',[1.2])
df6 = make_df('BCD',[3,4])
print(df5);print(df6);print(pd.concat([df5,df6],join='inner'));
#交集合并

        A     B     C
1.2  A1.2  B1.2  C1.2
    B   C   D
3  B3  C3  D3
4  B4  C4  D4
        B     C
1.2  B1.2  C1.2
3.0    B3    C3
4.0    B4    C4

3.append()

print(df1)
print('\n')
print(df2)
df1.append(df2)

	A	B
1	A1	B1
2	A2	B2
3	A3	B3
4	A4	B4

1
2